#-------------------------------------------------------------------------------
#
# Electoral predictors - fit incumbency model
#
# Author: Sina Chen
#
#-------------------------------------------------------------------------------


# Libraries ---------------------------------------------------------------

{
  library(dplyr)
  library(rstan)
  rstan_options(auto_write = TRUE)
  options(mc.cores = parallel::detectCores())
}

gc(verbose = T)


# Data --------------------------------------------------------------------

polls <- readRDS("data/us_senate_polls1990_2022_final.RDS")


# Preparation -------------------------------------------------------------

polls <- polls %>%
  filter(dte < 101) %>%                               # exclude polls conducted more than 100 days before election day
  group_by(state, cycle) %>%                          # compute election groups 
  mutate(n_poll = n()) %>% 
  ungroup() %>% 
  filter(n_poll >= 5) %>%                             # exclude elections with less than 5 polls
  mutate(state_year = paste0(state, cycle),
         cycle = as.integer(cycle),
         state_year_int = as.integer(as.factor(state_year)),
         t_sc = as.numeric(dte)/max(as.numeric(dte)), # scale days to election 0-1
         inc_dem = if_else(inc == "Dem. incumbent", 1, 0),
         inc_rep = if_else(inc == "Rep. incumbent", 1, 0))

# election-level data 
vote_sy <- polls %>%
  group_by(state_year, cycle,  state, vote2_rep, inc_dem, inc_rep) %>%
  summarise() %>%
  ungroup()

# Stan data 
stan_dat <- list(
  N = nrow(polls),                             # number of polls
  SY = length(unique(polls$state_year)),       # number of elections 

  poll = polls$pct2_rep,                       # two-party poll share
  vote = vote_sy$vote2_rep,                    # two-party vote share
  
  feature_dem = vote_sy$inc_dem,               # election-level feature Dem.
  feature_rep = vote_sy$inc_rep,               # election-level feature Rep.
  t = polls$t_sc,                              # scaled days to election
  
  sample_size = polls$sample_size * 
    (polls$vote_rep + polls$vote_dem),         # sample size adjusted for Rep. & Dem. poll share
  
  sy_id = polls$state_year_int                 # election id

)

# check Stan data
sapply(stan_dat, length)
sapply(stan_dat, range)


# Fit Stan model ----------------------------------------------------------

resStan <- stan(file = "code/fit_stan/stan_ml/ml_senate_bias_context_cat.stan", 
                data = stan_dat,
                chains = 4, iter = 5000,
                control = list(adapt_delta = 0.95, max_treedepth = 12)
) 

# save simulation results
saveRDS(resStan, 'code/fit_stan/resStan_us_senate_context1990_2022_inc.RDS')
